WGCNA GO Analysis

sessionInfo() #provides list of loaded packages and version of R. 
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35     R6_2.5.1          fastmap_1.1.1     xfun_0.43        
##  [5] cachem_1.0.8      knitr_1.45        htmltools_0.5.8.1 rmarkdown_2.26   
##  [9] lifecycle_1.0.4   cli_3.6.2         sass_0.4.9        jquerylib_0.1.4  
## [13] compiler_4.3.2    rstudioapi_0.16.0 tools_4.3.2       evaluate_0.23    
## [17] bslib_0.7.0       yaml_2.3.8        rlang_1.1.3       jsonlite_1.8.8
#BiocManager::install("simplifyEnrichment")

First, load the necessary packages.

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidyr)
library(grDevices)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: XML
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
## 
##     addNode
## The following object is masked from 'package:circlize':
## 
##     degree
## The following object is masked from 'package:plyr':
## 
##     join
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
## The following objects are masked from 'package:zoo':
## 
##     yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
library(simplifyEnrichment)
## 
## ========================================
## simplifyEnrichment version 1.10.0
## Bioconductor page: https://bioconductor.org/packages/simplifyEnrichment/
## Github page: https://github.com/jokergoo/simplifyEnrichment
## Documentation: https://jokergoo.github.io/simplifyEnrichment/
## Examples: https://simplifyenrichment.github.io/
## 
## If you use it in published research, please cite:
## Gu, Z. simplifyEnrichment: an R/Bioconductor package for Clustering and 
##   Visualizing Functional Enrichment Results, Genomics, Proteomics & 
##   Bioinformatics 2022.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(simplifyEnrichment))
## ========================================

Load in data

library(rtracklayer)
gff<-rtracklayer::import("../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3")
gff<-as.data.frame(gff) %>% dplyr::select(-Parent)
dim(gff) # 478988     9
## [1] 478988     12
names(gff) 
##  [1] "seqnames"      "start"         "end"           "width"        
##  [5] "strand"        "source"        "type"          "score"        
##  [9] "phase"         "ID"            "transcript_id" "gene_id"
transcripts <- subset(gff, type == "transcript")
transcripts_gr <- makeGRangesFromDataFrame(transcripts, keep.extra.columns=TRUE) #extract length information
transcript_lengths <- width(transcripts_gr) #isolate length of each gene
seqnames<-transcripts_gr$ID #extract list of gene id 
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame

dim(transcripts) #33730    13
## [1] 33730    12
kegg <- read.delim("../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt",header = FALSE)
kegg <- as.data.frame(kegg)
colnames(kegg)[1] <- "gene_id" 
colnames(kegg)[2] <- "KEGG_new"
head(kegg)
##                                      gene_id KEGG_new
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a         
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b   K22584
## 3  Pocillopora_acuta_HIv2___RNAseq.g22918.t1         
## 4  Pocillopora_acuta_HIv2___RNAseq.g18333.t1   K03386
## 5   Pocillopora_acuta_HIv2___RNAseq.g7985.t1         
## 6  Pocillopora_acuta_HIv2___RNAseq.g13511.t1
eggnog<-read.delim("../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt")#this file contains all of the go terms, descriptions, kegg, etc
eggnog<- plyr::rename(eggnog, c("X.query"="gene_id"))
head(eggnog,2)
##                                      gene_id  seed_ortholog    evalue score
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a 45351.EDO48725 2.16e-120   364
## 2  Pocillopora_acuta_HIv2___RNAseq.g18333.t1 45351.EDO38694 3.18e-123   355
##                                                                           eggNOG_OGs
## 1 COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2 COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
##   max_annot_lvl COG_category
## 1 33208|Metazoa            E
## 2 33208|Metazoa            O
##                                           Description Preferred_name
## 1    Cobalamin-independent synthase, Catalytic domain              -
## 2 negative regulation of male germ cell proliferation          PRDX4
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
##          EC   KEGG_ko
## 1  2.1.1.14 ko:K00549
## 2 1.11.1.15 ko:K03386
##                                                                           KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2                                                                     ko04214,map04214
##   KEGG_Module KEGG_Reaction             KEGG_rclass
## 1      M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2           -             -                       -
##                             BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000       -    -             -
## 2 ko00000,ko00001,ko01000,ko04147       -    -             -
##                 PFAMs
## 1         Meth_synt_2
## 2 1-cysPrx_C,AhpC-TSA
gogene <- merge(transcripts, eggnog, by=c("gene_id"), all=T)
gogene <- merge(gogene, kegg, by=c("gene_id"), all=T)
head(gogene,2)
##                                   gene_id                           seqnames
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t Pocillopora_acuta_HIv2___Sc0000013
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t Pocillopora_acuta_HIv2___Sc0000013
##     start     end width strand   source       type score.x phase
## 1 4542087 4551503  9417      + AUGUSTUS transcript      NA    NA
## 2 4639103 4647350  8248      + AUGUSTUS transcript      NA    NA
##                                        ID
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t
##                             transcript_id       seed_ortholog   evalue score.y
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t      45351.EDO27354 2.41e-93     317
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t 6087.XP_002166004.2 1.28e-38     164
##                                                                           eggNOG_OGs
## 1 COG0666@1|root,KOG0510@2759|Eukaryota,38G7Q@33154|Opisthokonta,3BCDU@33208|Metazoa
## 2                                              COG0666@1|root,KOG4177@2759|Eukaryota
##    max_annot_lvl COG_category                                Description
## 1  33208|Metazoa           DZ osmolarity-sensing cation channel activity
## 2 2759|Eukaryota            I                           spectrin binding
##   Preferred_name
## 1          TRPA1
## 2              -
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1 GO:0000302,GO:0001580,GO:0002791,GO:0002793,GO:0003008,GO:0003012,GO:0003674,GO:0004888,GO:0005034,GO:0005215,GO:0005216,GO:0005217,GO:0005244,GO:0005245,GO:0005261,GO:0005262,GO:0005488,GO:0005515,GO:0005575,GO:0005623,GO:0005886,GO:0005887,GO:0006810,GO:0006811,GO:0006812,GO:0006816,GO:0006873,GO:0006874,GO:0006875,GO:0006936,GO:0006939,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007166,GO:0007204,GO:0007600,GO:0007602,GO:0007606,GO:0007610,GO:0007638,GO:0008150,GO:0008324,GO:0009266,GO:0009314,GO:0009408,GO:0009409,GO:0009410,GO:0009416,GO:0009453,GO:0009581,GO:0009582,GO:0009583,GO:0009593,GO:0009605,GO:0009612,GO:0009628,GO:0009636,GO:0009719,GO:0009966,GO:0009967,GO:0009987,GO:0010033,GO:0010035,GO:0010037,GO:0010243,GO:0010378,GO:0010646,GO:0010647,GO:0010817,GO:0014070,GO:0014074,GO:0014832,GO:0014848,GO:0015075,GO:0015085,GO:0015267,GO:0015276,GO:0015278,GO:0015318,GO:0016020,GO:0016021,GO:0016043,GO:0016048,GO:0016324,GO:0019233,GO:0019722,GO:0019725,GO:0019932,GO:0022607,GO:0022803,GO:0022832,GO:0022834,GO:0022836,GO:0022838,GO:0022839,GO:0022843,GO:0022857,GO:0022890,GO:0023041,GO:0023051,GO:0023052,GO:0023056,GO:0030001,GO:0030003,GO:0030424,GO:0031000,GO:0031224,GO:0031226,GO:0031644,GO:0031646,GO:0032024,GO:0032421,GO:0032501,GO:0032879,GO:0032880,GO:0032991,GO:0033554,GO:0033555,GO:0034220,GO:0034605,GO:0034702,GO:0034703,GO:0035556,GO:0035690,GO:0035774,GO:0036270,GO:0038023,GO:0040011,GO:0040040,GO:0042221,GO:0042330,GO:0042331,GO:0042391,GO:0042493,GO:0042542,GO:0042592,GO:0042752,GO:0042802,GO:0042995,GO:0043005,GO:0043052,GO:0043269,GO:0043270,GO:0043279,GO:0043933,GO:0044057,GO:0044070,GO:0044085,GO:0044425,GO:0044459,GO:0044464,GO:0045177,GO:0046677,GO:0046873,GO:0046883,GO:0046887,GO:0046957,GO:0048265,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048878,GO:0050708,GO:0050714,GO:0050789,GO:0050794,GO:0050796,GO:0050801,GO:0050848,GO:0050850,GO:0050877,GO:0050896,GO:0050906,GO:0050907,GO:0050909,GO:0050912,GO:0050913,GO:0050951,GO:0050954,GO:0050955,GO:0050960,GO:0050961,GO:0050965,GO:0050966,GO:0050968,GO:0050974,GO:0050982,GO:0051046,GO:0051047,GO:0051049,GO:0051050,GO:0051179,GO:0051209,GO:0051222,GO:0051223,GO:0051234,GO:0051239,GO:0051240,GO:0051259,GO:0051260,GO:0051262,GO:0051282,GO:0051283,GO:0051289,GO:0051480,GO:0051606,GO:0051641,GO:0051649,GO:0051716,GO:0051930,GO:0051931,GO:0051969,GO:0052129,GO:0055065,GO:0055074,GO:0055080,GO:0055082,GO:0055085,GO:0060089,GO:0060341,GO:0060401,GO:0060402,GO:0061178,GO:0065003,GO:0065007,GO:0065008,GO:0070201,GO:0070417,GO:0070588,GO:0070838,GO:0070887,GO:0071241,GO:0071244,GO:0071310,GO:0071312,GO:0071313,GO:0071407,GO:0071415,GO:0071417,GO:0071466,GO:0071495,GO:0071840,GO:0071944,GO:0072347,GO:0072503,GO:0072507,GO:0072511,GO:0090087,GO:0090276,GO:0090277,GO:0097458,GO:0097553,GO:0097603,GO:0097604,GO:0098590,GO:0098655,GO:0098660,GO:0098662,GO:0098771,GO:0098796,GO:0098862,GO:0098900,GO:0098908,GO:0099094,GO:0099604,GO:0120025,GO:1901698,GO:1901699,GO:1901700,GO:1901701,GO:1902495,GO:1902531,GO:1902533,GO:1903522,GO:1903530,GO:1903532,GO:1903793,GO:1904058,GO:1904951,GO:1990351,GO:1990760
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
##   EC   KEGG_ko     KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass
## 1  - ko:K04984 ko04750,map04750           -             -           -
## 2  -         -                -           -             -           -
##                     BRITE                                 KEGG_TC CAZy
## 1 ko00000,ko00001,ko04040 1.A.4.6.1,1.A.4.6.2,1.A.4.6.3,1.A.4.6.5    -
## 2                       -                                       -    -
##   BiGG_Reaction                           PFAMs KEGG_new
## 1             - Ank,Ank_2,Ank_3,Ank_4,Ion_trans   K04984
## 2             - Ank,Ank_2,Ank_4,Ank_5,Ion_trans   K04984
dim(gogene)
## [1] 33730    33
geneInfo <- read.csv("../../output/WGCNA/WGCNA_ModuleMembership.csv") #this file was generated from the WGCNA analyses and contains the modules of interest
geneInfo<- plyr::rename(geneInfo, c("X"="gene_id"))
dim(geneInfo) # there are 9012 genes in our gene info file
## [1] 9012   40
geneInfo <- merge(gogene, geneInfo, by=c("gene_id")) #merging the GO and Kegg info to module membership for the 9012 genes

Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in GOs column

geneInfo$GOs <- gsub(",", ";", geneInfo$GOs)
geneInfo$GOs <- gsub('"', "", geneInfo$GOs)
geneInfo$GOs <- gsub("-", NA, geneInfo$GOs)

geneInfo$KEGG_new[geneInfo$KEGG_new == ""] <- NA
unique(geneInfo$moduleColor)
##  [1] "green"        "blue"         "salmon"       "turquoise"    "yellow"      
##  [6] "black"        "red"          "magenta"      "lightcyan"    "purple"      
## [11] "brown"        "pink"         "midnightblue" "tan"          "cyan"
geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)]
dim(geneInfo)
## [1] 9012   73
write.csv(geneInfo, file = "../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv") #gene info for reference/supplement

Modules

calc_up_mods <- c("brown", "red", "black", "pink", "salmon", "blue")

nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")) #942
## [1] 942
nrow(geneInfo %>% dplyr::filter(moduleColor=="red")) #425
## [1] 425
nrow(geneInfo %>% filter(moduleColor=="black")) #396
## [1] 396
nrow(geneInfo %>% filter(moduleColor=="pink")) #220
## [1] 220
nrow(geneInfo %>% filter(moduleColor=="salmon")) #154
## [1] 154
nrow(geneInfo %>% filter(moduleColor=="blue")) #1989
## [1] 1989
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")), nrow(geneInfo %>% dplyr::filter(moduleColor=="red")), nrow(geneInfo %>% filter(moduleColor=="black")), nrow(geneInfo %>% filter(moduleColor=="pink")), nrow(geneInfo %>% filter(moduleColor=="salmon")), nrow(geneInfo %>% filter(moduleColor=="blue")))
## [1] 4126
# 4126

calc_down_mods <- c("turquoise","magenta","lightcyan")

nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")) #2558
## [1] 2558
nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")) #219
## [1] 219
nrow(geneInfo %>% filter(moduleColor=="lightcyan")) #65
## [1] 65
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")), nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")), nrow(geneInfo %>% filter(moduleColor=="lightcyan")))
## [1] 2842
# 2842

other_mods <- c("green","yellow", "purple", "midnightblue","cyan","tan")

sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="green")), nrow(geneInfo %>% dplyr::filter(moduleColor=="yellow")), nrow(geneInfo %>% filter(moduleColor=="purple")), nrow(geneInfo %>% filter(moduleColor=="midnightblue")), nrow(geneInfo %>% filter(moduleColor=="cyan")),nrow(geneInfo %>% filter(moduleColor=="tan")))
## [1] 2044
# 2044

# 4126 + 2842 + 2044 = 9012, which represents all of our genes

Calcification - Up

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)

4126 genes are in the 6 modules significantly upregulated by calcification.

### Generate vector with names in just the module we are analyzing
# ID.vector <- geneInfo %>%
#   filter(moduleColor==c("brown", "red", "black", "pink", "salmon", "green")) %>%
#   #get_rows(.data[[module]]))%>%
#   pull(gene_id)

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("brown", "red", "black", "pink", "salmon", "blue")) %>%
  pull(gene_id)

length(ID.vector) #4126
## [1] 4126
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

Run the GOSeq function by module color to test for GO term enrichment. Due to high number of enriched GO terms, I am using a p-value threshold of <0.05 and using rrvgo package to reduce redundancy in list of GO terms.

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results 
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
GO_05 <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
go_results_BP <- GO_05 %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 654   8
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0050794            8.436918e-08                1.0000000       1395
## 2 2 GO:0065007            1.553493e-06                0.9999989       1574
## 3 3 GO:0050789            1.584882e-06                0.9999989       1479
## 4 4 GO:0048523            1.581326e-05                0.9999878        800
## 5 5 GO:0048518            3.043369e-05                0.9999758        950
## 6 6 GO:0051336            3.403563e-05                0.9999783        232
##   numInCat                                      term ontology
## 1     2785            regulation of cellular process       BP
## 2     3184                     biological regulation       BP
## 3     2981          regulation of biological process       BP
## 4     1569   negative regulation of cellular process       BP
## 5     1888 positive regulation of biological process       BP
## 6      415          regulation of hydrolase activity       BP

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")

#compare_clustering_methods(Mat, plot_type = "heatmap")

pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 654 terms by 'binary_cut'... 184 clusters, used 1.835802 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen 
##                 2
CalcUpMods_GO_P05 <- go_results_BP %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcUpMods_GO_P05

ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05.pdf", CalcUpMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 508  10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 73

The reduced list of terms is 508 terms that falls under 73 parent terms.

write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")

Plot reduced terms

#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_calcification <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_calcification

ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification.pdf", plot=GO.plot_calcification, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcUpMods_GO_P05_reduced <- go_results_BP_reduced %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcUpMods_GO_P05_reduced

ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05_reduced.pdf", CalcUpMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)

Calcification - Up - By Module

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)

Brown

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("brown")) %>%
  pull(gene_id)

length(ID.vector) #942
## [1] 942
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector #set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_brown <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Brown")

go_results_BP <- GO_05_brown %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 206   8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_brown.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50),  min_term = 3)
## Cluster 206 terms by 'binary_cut'... 73 clusters, used 0.358979 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 11 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Red

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("red")) %>%
  pull(gene_id)

length(ID.vector) #425
## [1] 425
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_red <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Red")

go_results_BP <- GO_05_red %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 209   8
#Write file of results 
write.csv(GO_05_red, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_red.csv")

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_red.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 209 terms by 'binary_cut'... 147 clusters, used 0.9499679 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 9 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Black

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("black")) %>%
  pull(gene_id)

length(ID.vector) #396
## [1] 396
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_black <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Black")

go_results_BP <- GO_05_black %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 395   8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_black.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 395 terms by 'binary_cut'... 281 clusters, used 3.392079 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Pink

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("pink")) %>%
  pull(gene_id)

length(ID.vector) #220
## [1] 220
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_pink <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Pink")

go_results_BP <- GO_05_pink %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 234   8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_pink.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 234 terms by 'binary_cut'... 125 clusters, used 0.522738 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Salmon

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("salmon")) %>%
  pull(gene_id)

length(ID.vector) #154
## [1] 154
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_salmon <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Salmon")

go_results_BP <- GO_05_salmon %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 198   8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_salmon.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 198 terms by 'binary_cut'... 79 clusters, used 0.2580299 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 10 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Blue

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("blue")) %>%
  pull(gene_id)

length(ID.vector) #1989
## [1] 1989
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_blue <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Blue")

go_results_BP <- GO_05_blue %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 1530    8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_blue.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50))
## Cluster 1530 terms by 'binary_cut'... 419 clusters, used 16.81366 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 7 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Combining all the individual module lists for Calcification - Up

GO_05_UP <- rbind(GO_05_brown,GO_05_red,GO_05_black,GO_05_pink,GO_05_salmon,GO_05_blue)

length(GO_05_UP$GOterm) #3810 enriched GO terms
## [1] 3810
length(unique(GO_05_UP$GOterm)) #3617 enriched GO terms
## [1] 3617
# Collapse GO terms that are duplicated accross modules - sort by GO term and p-value, then keep the row with the lowest p-value, collapsing the module names so you know which modules were represented

# Identify and collapse duplicates
GO_05_UP <- GO_05_UP %>%
  arrange(GOterm, over_represented_pvalue) %>%
  group_by(GOterm) %>% mutate(Module = paste(unique(Module), collapse = ";")) %>%
  group_by(GOterm) %>%
  slice_min(order_by = over_represented_pvalue, n = 1) %>%
  ungroup()

length(GO_05_UP$GOterm) #3617 enriched GO terms
## [1] 3617
length(unique(GO_05_UP$GOterm)) #3617 enriched GO terms, all are unique
## [1] 3617
#Write file of results 
write.csv(GO_05_UP, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv")

Calcification - Down

2842 genes are in the 4 modules significantly downregulated by calcification.

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)
### Generate vector with names in just the module we are analyzing
ID.vector <- geneInfo%>%
  filter(moduleColor %in% c("turquoise","magenta","lightcyan"))%>%
  pull(gene_id)

length(ID.vector) #2842
## [1] 2842
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

### Run the GOSeq function by module color to test for GO term enrichment. Due to high number of enriched GO terms, I am using a p-value threshold of <0.05 and using rrvgo package to reduce redundancy in list of GO terms.

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results 
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
GO_05 <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
go_results_BP <- GO_05 %>%
      filter(ontology=="BP") %>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat > 10) %>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 298   8
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0002181            8.295294e-15                        1         67
## 2 2 GO:0006614            2.463649e-13                        1         48
## 3 3 GO:0006412            3.096109e-13                        1        123
## 4 4 GO:0043043            1.598313e-12                        1        126
## 5 5 GO:0006613            2.273026e-12                        1         49
## 6 6 GO:0006518            7.543937e-12                        1        146
##   numInCat                                                        term ontology
## 1       90                                     cytoplasmic translation       BP
## 2       59 SRP-dependent cotranslational protein targeting to membrane       BP
## 3      219                                                 translation       BP
## 4      231                                peptide biosynthetic process       BP
## 5       63               cotranslational protein targeting to membrane       BP
## 6      285                                   peptide metabolic process       BP

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 298 terms by 'binary_cut'... 87 clusters, used 0.564898 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 4 GO lists...
dev.off()
## quartz_off_screen 
##                 2
CalcDownMods_GO_P05 <- go_results_BP %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcDownMods_GO_P05

ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05.pdf", CalcDownMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 226  10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 38

The reduced list of terms is 226 terms that falls under 38 parent terms.

write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_calcification_down <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_calcification_down

ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification_down.pdf", plot=GO.plot_calcification_down, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcDownMods_GO_P05_reduced <- go_results_BP_reduced %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcDownMods_GO_P05_reduced

ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05_reduced.pdf", CalcDownMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)

Calcification - Down - By Module

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)

Turquoise

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("turquoise")) %>%
  pull(gene_id)

length(ID.vector) #2558
## [1] 2558
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector #set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_turquoise <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Turquoise")

go_results_BP <- GO_05_turquoise %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 382   8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_turquoise.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 382 terms by 'binary_cut'... 115 clusters, used 0.902597 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Magenta

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("magenta")) %>%
  pull(gene_id)

length(ID.vector) #219
## [1] 219
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_magenta <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Magenta")

go_results_BP <- GO_05_magenta %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 210   8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_magenta.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 210 terms by 'binary_cut'... 147 clusters, used 0.779767 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 7 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Lightcyan

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("lightcyan")) %>%
  pull(gene_id)

length(ID.vector) #65
## [1] 65
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_lightcyan <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Lightcyan")

go_results_BP <- GO_05_lightcyan %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 129   8

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_lightcyan.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 129 terms by 'binary_cut'... 82 clusters, used 0.257122 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Combining all the individual module lists for Calcification - Down

GO_05_DOWN <- rbind(GO_05_turquoise,GO_05_magenta,GO_05_lightcyan)

length(GO_05_DOWN$GOterm) #1072 enriched GO terms
## [1] 1072
length(unique(GO_05_DOWN$GOterm)) #1070 enriched GO terms
## [1] 1070
# Collapse GO terms that are duplicated accross modules - sort by GO term and p-value, then keep the row with the lowest p-value, collapsing the module names so you know which modules were represented

# Identify and collapse duplicates
GO_05_DOWN <- GO_05_DOWN %>%
  arrange(GOterm, over_represented_pvalue) %>%
  group_by(GOterm) %>% mutate(Module = paste(unique(Module), collapse = ";")) %>%
  group_by(GOterm) %>%
  slice_min(order_by = over_represented_pvalue, n = 1) %>%
  ungroup()

#Write file of results 
write.csv(GO_05_DOWN, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv")

Merge the frequency of GOterms for both up- and down-regulation of calcification

Reduce the lists together

# GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
# GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")

GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv") %>% dplyr::select(-"X")
GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_05_up %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_up)
## [1] 2647    8
go_results_BP_down <- GO_05_down %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_down)
## [1] 721   8
all_enriched_terms_up_down <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm)
length(all_enriched_terms_up_down)
## [1] 3368
length(unique(all_enriched_terms_up_down))
## [1] 3217

By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules.

library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down),
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity 
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores = "size",
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 2004   10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 148
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 85
#write.csv(go_results_BP_up_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>% mutate(Factor = "Up") 
head(cal_up_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0050794            7.575558e-23                        1        823
## 2 GO:0065007            9.035173e-22                        1        912
## 3 GO:0050789            1.200453e-20                        1        859
## 4 GO:0048522            2.736399e-18                        1        531
## 5 GO:0048518            2.733516e-17                        1        574
## 6 GO:0009987            1.373221e-16                        1       1074
##   numInCat                                      term ontology Module
## 1     2785            regulation of cellular process       BP   Blue
## 2     3184                     biological regulation       BP   Blue
## 3     2981          regulation of biological process       BP   Blue
## 4     1705   positive regulation of cellular process       BP   Blue
## 5     1888 positive regulation of biological process       BP   Blue
## 6     4026                          cellular process       BP   Blue
##                                                  ParentTerm Factor
## 1                            regulation of cellular process     Up
## 2                            regulation of cellular process     Up
## 3                            regulation of cellular process     Up
## 4 positive regulation of transcription by RNA polymerase II     Up
## 5 positive regulation of transcription by RNA polymerase II     Up
## 6                                          cellular process     Up

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
head(cal_down_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0002181            1.561228e-16                        1         67
## 2 GO:0006412            2.328139e-16                        1        123
## 3 GO:0043043            3.132084e-15                        1        125
## 4 GO:0006518            5.575430e-15                        1        145
## 5 GO:0006614            1.285770e-14                        1         48
## 6 GO:0043603            2.192130e-14                        1        170
##   numInCat                                                        term ontology
## 1       90                                     cytoplasmic translation       BP
## 2      219                                                 translation       BP
## 3      231                                peptide biosynthetic process       BP
## 4      285                                   peptide metabolic process       BP
## 5       59 SRP-dependent cotranslational protein targeting to membrane       BP
## 6      359                                     amide metabolic process       BP
##      Module        ParentTerm Factor
## 1 Turquoise       translation   Down
## 2 Turquoise       translation   Down
## 3 Turquoise       translation   Down
## 4 Turquoise       translation   Down
## 5 Turquoise protein transport   Down
## 6 Turquoise       translation   Down

Merge up and down-regulation of calcification GOterms

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated or downregulated for calcification. The list has been further reduced by using the package rrvgo.

all_terms <- rbind(cal_down_terms,cal_up_terms)

all_terms <-  all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","ParentTerm","Module")]

all_terms$GOterm<-as.factor(all_terms$GOterm)

dim(all_terms) #2140 reduced terms 
## [1] 2140   10
length(unique(all_terms$GOterm)) #this represents 2004 unique terms between the two lists of reduced terms
## [1] 2004
length(unique(all_terms$ParentTerm)) #this represents 153 unique terms between the two lists of reduced terms
## [1] 153

Unique terms

This is collapsing the list from 2236 rows to 2004, representing the 2004 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.

goterms_shared <- all_terms %>%
  group_by(GOterm) %>%
  dplyr::summarise(
    ParentTerm = paste(unique(ParentTerm), collapse = ", "),
    Factor = paste(unique(Factor), collapse = ", "),
    term = paste(unique(term), collapse = ", ")
  )
dim(goterms_shared)
## [1] 2004    4
goterms_shared_full <- goterms_shared %>%
  left_join(dplyr::select(all_terms,GOterm, Factor, over_represented_pvalue), by = "GOterm") %>% #select 3 columns GOterm, Factor, over_represented_pvalue from all_terms and left join by GOterm, turns this dataframe from 2004 rows back to the 2236 in all_terms
  mutate(pvalue_Up = ifelse(Factor.y == "Up", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Up factor
  mutate(pvalue_Down = ifelse(Factor.y == "Down", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Down factor
  dplyr::select(-c("over_represented_pvalue", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
  group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
  dplyr::summarize(pvalue_Up = na.omit(pvalue_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
            pvalue_Down = na.omit(pvalue_Down)[1]) %>% #carry over the p-value for the term in the down direction, by taking the first non-NA value.
  rename(Factor.x = "Factor") #rename column 
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm.csv")

plots

goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()

goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")

nrow(goterms_up)
## [1] 1571
nrow(goterms_down)
## [1] 297
nrow(goterms_shared_only)
## [1] 136
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_shared_only) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 145
length(unique(goterms_down$ParentTerm))
## [1] 79
length(unique(goterms_shared_only$ParentTerm))
## [1] 42
freq_fig_up_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "red") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  guides(color = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
freq_fig_down_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "blue") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) 

compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs

ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Shared_ParentTerms.pdf", plot=compare_figs, dpi=300, height=3, units="in", limitsize=FALSE)
## Saving 7 x 3 in image
freq_fig_up_pval <- ggplot(goterms_up, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "red") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  guides(color = FALSE)

freq_fig_down_pval <- ggplot(goterms_down, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "blue") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank())

compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs

ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Unique_ParentTerms.pdf", plot=compare_figs, dpi=300, height=10, units="in", limitsize=FALSE)
## Saving 7 x 10 in image

GOSlim

I downloaded the GOSlim terms list from the following website on 4/5/2024: http://www.informatics.jax.org/gotools/data/input/map2MGIslim.txt

I did so by going to this link and right clicking “here” at “Tab-delimited file mapping GO_ids to MGI GO_Slim category available for download here” and saying “Save link as…” , and I saved it as “map2MGIslim.txt”. I then converted this to CSV and uploaded to this github repository here.

Add slim terms to dataset

GOslim <- read.csv("../../data/map2MGIslim.csv") %>% dplyr::select(-c(term,aspect))
goterms_shared_full_slim <- goterms_shared_full %>%
  inner_join(GOslim, by = c("GOterm" = "GO_id"))

goterms_up_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Up" | Factor=="Down, Up")

goterms_up_full_slim_plot <- goterms_up_slim %>%
      ggplot(aes(x=pvalue_Up, 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Over-represented p-value", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text.y = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
      ); goterms_up_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim.pdf", goterms_up_full_slim_plot, width = 5, height = 35)

goterms_up_full_slim_plot <- goterms_up_slim %>%
      ggplot(aes(x=log10(pvalue_Up), 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Log 10(Over-represented p-value)", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text.y = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
      ); goterms_up_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim_Log.pdf", goterms_up_full_slim_plot, width = 5, height = 35)
goterms_down_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Down" | Factor=="Down, Up")

goterms_down_full_slim_plot <- goterms_down_slim %>%
      ggplot(aes(x=pvalue_Down, 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Over-represented p-value", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
        legend.text=element_text(size = 5),
        legend.title = element_text(size = 5)
      ); goterms_down_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim.pdf", goterms_down_full_slim_plot, width = 5, height = 35)

goterms_down_full_slim_plot <- goterms_down_slim %>%
      ggplot(aes(x=log10(pvalue_Down), 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Log 10(Over-represented p-value)", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
        legend.text=element_text(size = 5),
        legend.title = element_text(size = 5)
      ); goterms_down_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim_Log.pdf", goterms_down_full_slim_plot, width = 5, height = 35)

For each parent term in this list, how many are “Up” by calcification, “Down”, or both

SharedGOterms = # of GO terms within the parent term that are in the list for “Down”, “Up”, or “Down, Up”

result_parent_unique <- goterms_shared %>%
  group_by(ParentTerm,Factor) %>%
  dplyr::summarise(SharedGOterms = n_distinct(GOterm)) %>%
  arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
parent_filtered_up <- result_parent_unique %>%
  dplyr::filter(Factor=="Up" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 
parent_filtered_down <- result_parent_unique %>%
  dplyr::filter(Factor=="Down" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 

Count number of GOterms by ParentTerm for the upregulation of calcification

result_up <- cal_up_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term))%>%
  mutate(Calcification.direction = "Up")

head(result_up)
## # A tibble: 6 × 3
##   ParentTerm                              Number.of.terms Calcification.direct…¹
##   <chr>                                             <int> <chr>                 
## 1 Arp2/3 complex-mediated actin nucleati…              13 Up                    
## 2 B cell differentiation                                8 Up                    
## 3 BMP signaling pathway                                19 Up                    
## 4 DNA topological change                                7 Up                    
## 5 G protein-coupled receptor signaling p…               3 Up                    
## 6 Golgi organization                                    7 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
dim(result_up)
## [1] 148   3
merged_up <- parent_filtered_up %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
  left_join(result_up, by = "ParentTerm") #join with result from above to get the Number.of.Terms column

merged_up_clean <- na.omit(merged_up)
head(merged_up_clean)
## # A tibble: 6 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 Arp2/3 complex-me…                   13              13 Up                    
## 2 B cell differenti…                    8               8 Up                    
## 3 BMP signaling pat…                   19              19 Up                    
## 4 DNA topological c…                    7               7 Up                    
## 5 G protein-coupled…                    3               3 Up                    
## 6 Golgi organization                    7               7 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
dim(merged_up_clean)
## [1] 147   4

Count number of GOterms by ParentTerm for the downregulation of calcification

result_down <- cal_down_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term))%>%
  mutate(Calcification.direction = "Down")

dim(result_down)
## [1] 85  3
head(result_down)
## # A tibble: 6 × 3
##   ParentTerm                              Number.of.terms Calcification.direct…¹
##   <chr>                                             <int> <chr>                 
## 1 Arp2/3 complex-mediated actin nucleati…               2 Down                  
## 2 DNA topological change                                2 Down                  
## 3 G protein-coupled receptor signaling p…               2 Down                  
## 4 IRE1-mediated unfolded protein response               5 Down                  
## 5 NAD metabolic process                                 6 Down                  
## 6 NADH regeneration                                     1 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
merged_down <- parent_filtered_down %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
  left_join(result_down, by = "ParentTerm") 

merged_down_clean<- na.omit(merged_down)
head(merged_down_clean)
## # A tibble: 6 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 Arp2/3 complex-me…                    2               2 Down                  
## 2 DNA topological c…                    2               2 Down                  
## 3 G protein-coupled…                    2               2 Down                  
## 4 IRE1-mediated unf…                    5               5 Down                  
## 5 NAD metabolic pro…                    6               6 Down                  
## 6 NADH regeneration                     1               1 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Frequency >10 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Up")
dim(cal_freq_terms_filtered_up)
## [1] 68  4

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image

Frequency >10 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=10)
  #filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 12 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 fatty acid metab…                   12              12 Down                  
##  2 intracellular si…                   12              12 Down                  
##  3 metabolic process                   15              15 Down                  
##  4 negative regulat…                   12              12 Down                  
##  5 peptidyl-serine …                   16              16 Down                  
##  6 protein transport                   16              16 Down                  
##  7 proton motive fo…                   26              26 Down                  
##  8 regulation of pr…                   13              13 Down                  
##  9 regulation of tr…                   17              17 Down                  
## 10 reproduction                        13              13 Down                  
## 11 tricarboxylic ac…                   11              11 Down                  
## 12 ubiquitin-depend…                   26              26 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

Frequency >20 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=20) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 25 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 IRE1-mediated un…                   41              41 Up                    
##  2 anatomical struc…                   20              20 Up                    
##  3 anatomical struc…                   22              22 Up                    
##  4 cell cycle                          57              57 Up                    
##  5 cell projection …                   21              21 Up                    
##  6 chondroitin sulf…                   24              24 Up                    
##  7 fatty acid metab…                   23              23 Up                    
##  8 innate immune re…                   28              28 Up                    
##  9 intracellular ca…                   24              24 Up                    
## 10 intracellular si…                   33              33 Up                    
## # ℹ 15 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_gr20.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image

Frequency >20 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=20)
  #filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 proton motive for…                   26              26 Down                  
## 2 ubiquitin-depende…                   26              26 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_gr20.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

All terms up

cal_freq_terms_filtered_up_all <- merged_up_clean %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up_all
## # A tibble: 147 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 Arp2/3 complex-m…                   13              13 Up                    
##  2 B cell different…                    8               8 Up                    
##  3 BMP signaling pa…                   19              19 Up                    
##  4 DNA topological …                    7               7 Up                    
##  5 G protein-couple…                    3               3 Up                    
##  6 Golgi organizati…                    7               7 Up                    
##  7 IRE1-mediated un…                   41              41 Up                    
##  8 L-ascorbic acid …                    3               3 Up                    
##  9 Mo-molybdopterin…                    5               5 Up                    
## 10 NAD metabolic pr…                    4               4 Up                    
## # ℹ 137 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_ALL.pdf", plot=freq_fig_up, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image

All terms down

cal_freq_terms_filtered_down_all <- merged_down_clean %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down_all
## # A tibble: 84 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 Arp2/3 complex-m…                    2               2 Down                  
##  2 DNA topological …                    2               2 Down                  
##  3 G protein-couple…                    2               2 Down                  
##  4 IRE1-mediated un…                    5               5 Down                  
##  5 NAD metabolic pr…                    6               6 Down                  
##  6 NADH regeneration                    1               1 Down                  
##  7 P granule assemb…                    1               1 Down                  
##  8 actin cytoskelet…                    1               1 Down                  
##  9 acyl-CoA metabol…                    1               1 Down                  
## 10 anatomical struc…                    4               4 Down                  
## # ℹ 74 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_ALL.pdf", plot=freq_fig_down, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, ncol=2, align="h")
compare_figs